/*===============================================================================
This do file brings in GDP data

===============================================================================*/
clear all
set more off

*===============================================================================
* Eurostat Regional Yearbook 
*===============================================================================
cd "$insheet_files/Eurostat Regional Yearbook"
import excel using EYB_GDP_1973.xlsx, clear first
note: "Countries: Germany, France, Italy, Belgium, Netherlands. Year: 1970. GDP per capita in units of local currency. Source: Eurostat yearbook 1973-74."
drop if nuts=="DK"

tempfile EYB_full
save	`EYB_full.dta'

*-------------------------------------------------------------------------------
* 1. separate regions BE24 and BE31 from Brabant, BE1&BE24&BE31 
*-------------------------------------------------------------------------------

use nuts age_total using "$dta_files/IC_EU_AGE.dta" if ///
	(nuts=="BE2"|nuts=="BE3"|nuts=="BE21"|nuts=="BE22"|nuts=="BE23"| ///
	nuts=="BE25"|nuts=="BE32"|nuts=="BE33"|nuts=="BE34"|nuts=="BE35") 

merge 1:1 nuts using `EYB_full.dta', keep(match master)

gen gdp = gdp_per_capita * age_total 

mkmat gdp if nuts=="BE2", mat(BE2)
mkmat gdp if nuts=="BE3", mat(BE3)
matrix l=(1,1,1,1)
mkmat gdp if nuts=="BE21"|nuts=="BE22"|nuts=="BE23"|nuts=="BE25", mat(sumBE2)
matrix sumBE2=l*sumBE2
mkmat gdp if nuts=="BE32"|nuts=="BE33"|nuts=="BE34"|nuts=="BE35", mat(sumBE3)
matrix sumBE3=l*sumBE3
matrix BE24=BE2-sumBE2
matrix BE31=BE3-sumBE3

clear
svmat BE24, names(gdp)
gen nuts="BE24"
rename gdp1 gdp
tempfile BE_24
save 	`BE_24.dta'

clear
svmat BE31, names(gdp)
gen nuts="BE31"
rename gdp1 gdp

append using `BE_24.dta'

tempfile BE_24_31
save `BE_24_31.dta' 

use nuts age_total using "$dta_files/IC_EU_AGE.dta" if ///
	(nuts=="BE24"|nuts=="BE31")

merge 1:1 nuts using `BE_24_31.dta', keep(match master)

gen gdp_per_capita = gdp / age_total 

keep nuts gdp_per_capita 

tempfile BE_24_31
save `BE_24_31.dta' 

*-------------------------------------------------------------------------------
* 2. Add duplicates for NUTS2==NUTS1 regions (e.g. add DE30 to DE3)
*-------------------------------------------------------------------------------
use `EYB_full.dta', clear

keep if nuts=="DE3"|nuts=="DE5"|nuts=="DE6"|nuts=="DEC"|nuts=="DEF"
replace nuts="DE30" if nuts=="DE3"
replace nuts="DE50" if nuts=="DE5"
replace nuts="DE60" if nuts=="DE6"
replace nuts="DEC0" if nuts=="DEC"
replace nuts="DEF0" if nuts=="DEF"

tempfile DE
save	`DE.dta'

*-------------------------------------------------------------------------------
* 3. Aggregate regions of IT to NUTS1 regions
*-------------------------------------------------------------------------------
use `EYB_full.dta', clear

gen NUTS1=""
replace NUTS1="NL3" if nuts=="NL31"|nuts=="NL31"|nuts=="NL33"|nuts=="NL34"
replace NUTS1="ITC" if nuts=="ITC1"|nuts=="ITC2"|nuts=="ITC3"|nuts=="ITC4"
replace NUTS1="ITF" if nuts=="ITF1"|nuts=="ITF2"|nuts=="ITF3"|nuts=="ITF4"| ///
		       nuts=="ITF5"|nuts=="ITF6"
replace NUTS1="ITH" if nuts=="ITH1&ITH2"|nuts=="ITH3"|nuts=="ITH4"|nuts=="ITH5"
replace NUTS1="ITI" if nuts=="ITI1"|nuts=="ITI2"|nuts=="ITI3"|nuts=="ITI4"
replace NUTS1="ITG" if nuts=="ITG1"|nuts=="ITG2"
drop if NUTS1==""
collapse (sum) gdp (first) region_name, by(NUTS1)

rename NUTS1 nuts

append using `EYB_full.dta'
append using `DE.dta' 
append using `BE_24_31.dta' 

tempfile EYB_GDP
save 	`EYB_GDP.dta'

*===============================================================================
* Denmark
*===============================================================================
cd "$insheet_files/Denmark"
import excel DK_GDP_1983.xlsx, clear first

collapse (sum) gdp, by(nuts)

tempfile DK_nuts2
save `DK_nuts2.dta'

gen nuts1 = substr(nuts, 1,3)

collapse (sum) gdp, by(nuts1)

rename nuts1 nuts

tempfile DK_nuts1
save 	`DK_nuts1.dta'

collapse (sum) gdp

gen nuts = "DK"

append using `DK_nuts1.dta'
append using `DK_nuts2.dta'

tempfile DK_GDP
save 	`DK_GDP.dta' 

*===============================================================================
* Finland
*===============================================================================
cd "$insheet_files/Finland/"
import excel "FI_GDP_1970.xlsx", first clear
note: "Country: Finland. Year: 1970. Source: Finland income distribution 1970.pdf (1000 000 mk)"

collapse (sum) gdp, by(nuts)

tempfile FI_GDP
save 	`FI_GDP.dta' 

*===============================================================================
* Austria
*===============================================================================
cd "$insheet_files/Austria"
import excel "AT_GDP_1971.xlsx", first clear
note: "Country: Austria. Year: 1971. Source: Norbert, Jeglitsch - 1976 - Das Inlandsprodukt nach politischen Bezirken im Jahre 1971.pdf (WIFO-Monatsberichte 2/1976, 54-63. Variable: GDP in mln. shilling. Regions match precisely for NUTS2, but somewhat imprecisely for NUTS3!)"

collapse (sum) gdp (first) region_name, by(nuts)

tempfile AT_nuts_0_2_3
save	`AT_nuts_0_2_3.dta'

* Gen NUTS 1 observation 
keep if length(nuts)==4
gen nuts1 = substr(nuts,1,3)

collapse (sum) gdp, by(nuts1)

rename nuts1 nuts
append using `AT_nuts_0_2_3.dta'

tempfile AT_GDP
save 	`AT_GDP.dta' 

*===============================================================================
* Switzerland
*===============================================================================
cd "$insheet_files/Switzerland"
import excel "CH_GDP_1970.xlsx", first clear

tempfile CH_GDP
save 	`CH_GDP.dta' 
*===============================================================================
* Spain
*===============================================================================
cd "$insheet_files/Spain"
import excel "ES_GDP_1980.xls", first cellrange(A8:C70) clear

tempfile ES_nuts_2_3
save 	`ES_nuts_2_3.dta'

keep if length(nuts)==4

gen nuts1 = substr(nuts,1,3)

collapse (sum) gdp, by(nuts1)

rename nuts1 nuts

tempfile ES_nuts1
save 	`ES_nuts1.dta'

collapse (sum) gdp

gen nuts= "ES"

append using `ES_nuts1.dta'
append using `ES_nuts_2_3.dta'

tempfile ES_GDP
save 	`ES_GDP.dta' 

*===============================================================================
* Sweden
*===============================================================================
cd "$insheet_files/Sweden"
import excel using "SE_GDP_1973.xlsx", first clear

gen year = 1973
merge 1:1 nuts year using "$dta_files/SE_labor", keep(match master) keepusing(POP)

*multiply mean income by GDP per capita
gen gdp = gdp_per_capita*POP*1000 

tempfile pop_nuts3
save `pop_nuts3.dta'

gen nuts2 = substr(nuts,1,4) 

collapse (sum) gdp, by(nuts2)

rename nuts2 nuts
tempfile pop_nuts2
save `pop_nuts2.dta'

gen nuts1 = substr(nuts,1,3) 

collapse (sum) gdp, by(nuts1)

rename nuts1 nuts
tempfile pop_nuts1
save `pop_nuts1.dta'

gen country=substr(nuts,1,2)

collapse (sum) gdp, by(country)

rename country nuts

append using `pop_nuts1.dta'
append using `pop_nuts2.dta'
append using `pop_nuts3.dta'

tempfile SE_GDP
save 	`SE_GDP.dta' 

*===============================================================================
* United Kingdom
* Regional Accounts Data, 1971-1999
* http://discover.ukdataservice.ac.uk/catalogue?sn=4010
*===============================================================================
clear
set more off 

cd "$insheet_files/UK"

import excel "gdpcty7796.xls", first cellrange(A11:Z100) 

drop if nuts==""

keep nuts GDP1977 gdp_per_capita1977

* Dyfed & Powys. This is modern UKL14 & UKL24, (basically all of central Wales.) 
* Reallocate GDP proportionally based on population in 1981.
* Population in 1981 http://www.citypopulation.de/php/uk-admin.php	
	
expand 2 if nuts=="DYFED&POWYS"
bys nuts: gen n=_n

replace GDP = GDP * ((107400+165100+61200)/(107400+165100+61200+112200)) if nuts=="DYFED&POWYS"&n==1
replace nuts = "UKL1" if nuts=="DYFED&POWYS" & n==1

replace GDP = GDP * (112200/(107400+165100+61200+112200)) if nuts=="DYFED&POWYS"&n==2
replace nuts = "UKL2" if nuts=="DYFED&POWYS" & n==2
drop n 
	
*CLWYD. Reallocate GDP proportionally based on population in 1981.
expand 2 if nuts=="CLWYD"
bys nuts: gen n=_n

*Denbighshire & Conwy
replace GDP = GDP * ((86700+99000)/(86700+99000+138600+119200)) if nuts=="CLWYD"&n==1
replace nuts = "UKL1" if nuts=="CLWYD" & n==1

*Flintshire + Wrexham
replace GDP = GDP * ((138600+119200)/(86700+99000+138600+119200)) if nuts=="CLWYD"&n==2
replace nuts = "UKL2" if nuts=="CLWYD" & n==2
drop n 
	
* GWENT. Reallocate GDP proportionally based on population in 1981.
expand 2 if nuts=="GWENT"
bys nuts: gen n=_n

* Blaenau Gwent, Caerphilly, Torfaen 
replace GDP = GDP * ((75700+171800+113200)/(75700+171800+113200+76500+132400)) if nuts=="GWENT"&n==1
replace nuts = "UKL1" if nuts=="GWENT" & n==1

*Monmouthshire and Newport
replace GDP = GDP * ((76500+132400)/(75700+171800+113200+76500+132400)) if nuts=="GWENT"&n==2
replace nuts = "UKL2" if nuts=="GWENT" & n==2
drop n 
	
replace GDP = GDP*1000000
gen POP = GDP/gdp_per_capita

tempfile precollapse
save `precollapse.dta'

collapse (sum) GDP, by(nuts)

tempfile postcollapse
save `postcollapse.dta'

use `precollapse.dta', clear

collapse (mean) gdp_per_capita [aw=POP], by(nuts)

merge 1:1 nuts using `postcollapse.dta', nogen assert(3)

rename GDP GDP
rename gdp_per_capita gdp_per_capita

*UKI1 & UKI2 are not broken out separately, so create duplicate entries 
expand 3 if nuts=="UKI"
bys nuts: gen n=_n

replace nuts = "UKI1" if nuts=="UKI" & n==2
replace nuts = "UKI2" if nuts=="UKI" & n==3

rename GDP gdp

drop n

*===============================================================================
* Merge all files
*===============================================================================
merge 1:1 nuts using `EYB_GDP.dta'

append using `AT_GDP'
append using `CH_GDP' 
append using `DK_GDP'
append using `ES_GDP'
append using `FI_GDP'
append using `SE_GDP'

save "$dta_files/IC_EU_GDP", replace


